knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

library(tidyverse)
library(rlang)
library(viridis)

theme_set(theme_minimal() + theme(legend.position = "right"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)

scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d
# Load and wrangle the overall data.
diabetes_df = read_csv("data/cleaned_diabetes_data.csv")

# Binary helper function
recode_factor = function(var) {
  factor(var, levels = c("No", "Yes"))
}

# Clean the diabetes variables
diabetes_df = 
  diabetes_df |> 
  mutate(
    has_diabetes = factor(has_diabetes, levels = c("Not diabetic", "Pre-diabetic", "Diabetic", "Gestational")),
    sex_at_birth = fct_infreq(sex_at_birth),
    age_category = factor(age_category, levels = c("18-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+"), order = TRUE),
    pregnant = recode_factor(pregnant),
    eval_type = factor(eval_type, levels = c("Not diabetic", "Type 1", "Type 2")),
    diab_type = factor(diab_type, levels = c(1, 2), labels = c("Type 1", "Type 2"))
  )

Defining Diabetes

There are three variables that we are interested in understanding – has_diabetes, diab_type, and eval_type.

has_diabetes: This variable describes the people in the survey who responded after being asked the question, “Have you ever been told you had diabetes?” Overall, 432,339 people responded to this question.

diab_type: This variable comes directly from the dataset, where respondents who were diabetic per the has_diabetes question were asked “What type of diabetes do you have?” Only 22,027 participants answered this question out of the 59,786 people who responded that they had diabetes. Since there are over 60% of respondents unaccounted for, this measure may not tell us enough about risk factors or comorbidities of T2D in the general population.

eval_type: Due to the missing data for diab_type, we created another variable eval_type that describes the person’s diabetes type by using other demographic information from this dataset. We used this paper published by the CDC that used the same BRFSS survey from 2014 to classify T2D diagnosis. In this paper, they classified a survey respondent to have type II diabetes if the respondent was older than 30, not pregnant, and answered yes to the question “Have you ever been told you have diabetes?”

We then classified a respondent to have type II diabetes if the respondent was older than 30, not pregnant, and were diabetic as per the has_diabetes question. We classified a respondent to have type I diabetes if the respondent was younger than 30 and were diabetic as per the has_diabetes question.

Diabetes Status (has_diabetes)

diabetes_dist = function(df, var) {
  df |> 
    group_by({{ var }}) |> 
    summarize(count = n()) |> 
    knitr::kable()
}

diabetes_plot = function(df, var) {
  diabetes_df |> 
    ggplot(aes(x = {{ var }})) +
    geom_bar(aes(fill = factor({{ var }})), color = "black", na.rm = FALSE) + 
    geom_text(
      stat = "count",  # Use the count statistic for frequencies
      aes(label = ..count..),  # Access the count directly
      vjust = -0.5,  # Adjust position of labels above the bars
      na.rm = FALSE
    ) + 
    labs(
      title = "Distribution of Diabetes", 
      x = "Diabetes Status", 
      y = "Count", 
      fill = as.character(rlang::ensym(var))
    ) +
    theme_minimal() +
    theme(legend.position="bottom", legend.title = element_blank())
}

Table

diabetes_dist(df = diabetes_df, var = has_diabetes)
has_diabetes count
Not diabetic 358706
Pre-diabetic 10594
Diabetic 59786
Gestational 3253
NA 984

Graph

diabetes_plot(df = diabetes_df, var = has_diabetes)

Reported Type (diab_type)

Table

diabetes_dist(df = diabetes_df, var = diab_type)
diab_type count
Type 1 1958
Type 2 20069
NA 411296

Graph

diabetes_plot(df = diabetes_df, var = diab_type)

Evaluated Type (eval_type)

Table

diabetes_dist(df = diabetes_df, var = eval_type)
eval_type count
Not diabetic 372797
Type 1 664
Type 2 59007
NA 855

Graph

diabetes_plot(df = diabetes_df, var = eval_type)

Demographics

diabetes_demographics = 
  diabetes_df |> 
  mutate(
    good_health = recode_factor(good_health),
    physical_activity = factor(
      physical_activity,
      levels = c(
        "Highly active",
        "Active",
        "Insufficiently active",
        "Inactive"
      )
    ),
    urb_rural = factor(urb_rural, levels = c("Urban", "Rural")),
    race = as.factor(race),
    income = factor(
      income,
      levels = c(
        "Less than 15K",
        "15K=25K",
        "25-35K",
        "35K-50K",
        "50K-100K",
        "100K-200K",
        "More than 200K"
      )
    ),
    education = factor(education,
      levels = c(
        "Kindergarten or less",
        "Elementary",
        "Some high school",
        "High school graduate",
        "Some college or technical school",
        "College graduate"
      )
    )
  ) |> 
  select(has_diabetes, diab_type, eval_type, good_health, physical_activity, urb_rural, race, income, education) |> 
  rename(
    `Has Diabetes` = has_diabetes,
    `Reported Type` = diab_type,
    `Evaluated Type` = eval_type,
    `In Good Health` = good_health,
    `Level of Physical Activity` = physical_activity,
    `Urban/Rural Status` = urb_rural,
    `Race/Ethnicity` = race,
    `Income Level` = income,
    `Educational Attainment` = education
  )

This section explores the distribution of demographic information among diabetes diagnoses.

  • Whether the respondent is in good health
  • Level of physical activity
  • Living in an urban vs. rural area
  • Race/Ethnicity
  • Income level
  • Educational attainment
# Define a function for the plot

comorbidities_plot = function(df, comorbidity, diabetes) {
  # Convert character strings to symbols
  comorbidity_sym <- rlang::sym(comorbidity)
  diabetes_sym <- rlang::sym(diabetes)
  
  df |> 
    filter(!is.na(!!comorbidity_sym), !(is.na(!!diabetes_sym))) |> 
    group_by(!!comorbidity_sym, !!diabetes_sym) |> 
    summarize(n = n(), .groups = 'drop') %>%
    group_by(!!diabetes_sym) %>%
    mutate(Percent = n / sum(n) * 100) |> 
    ggplot(aes(x = !!diabetes_sym, y = Percent, fill = !!comorbidity_sym)) +
    geom_bar(stat = "identity", position = "dodge") +
    geom_text(aes(label = sprintf("%.1f", Percent)),
              position = position_dodge(width = 0.9),
              size = 2.5, 
              vjust = -0.5) + 
    labs(
      title = str_c(diabetes, " by ", comorbidity), 
      x = diabetes, 
      y = "Percent (%)", 
      fill = comorbidity) +
    scale_fill_viridis_d(na.value = "gray50", option = "D") +
    theme_minimal() +
    theme(legend.position="bottom")
}

Good Health

demo = "In Good Health"

has_diabetes

comorbidities_plot(diabetes_demographics, demo, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_demographics, demo, "Reported Type")

eval_type

comorbidities_plot(diabetes_demographics, demo, "Evaluated Type")

Physical Activity

demo = "Level of Physical Activity"

has_diabetes

comorbidities_plot(diabetes_demographics, demo, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_demographics, demo, "Reported Type")

eval_type

comorbidities_plot(diabetes_demographics, demo, "Evaluated Type")

Urban/Rural

demo = "Urban/Rural Status"

has_diabetes

comorbidities_plot(diabetes_demographics, demo, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_demographics, demo, "Reported Type")

eval_type

comorbidities_plot(diabetes_demographics, demo, "Evaluated Type")

Race/Ethnicity

demo = "Race/Ethnicity"

has_diabetes

comorbidities_plot(diabetes_demographics, demo, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_demographics, demo, "Reported Type")

eval_type

comorbidities_plot(diabetes_demographics, demo, "Evaluated Type")

Income

demo = "Income Level"

has_diabetes

comorbidities_plot(diabetes_demographics, demo, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_demographics, demo, "Reported Type")

eval_type

comorbidities_plot(diabetes_demographics, demo, "Evaluated Type")

Education

demo = "Educational Attainment"

has_diabetes

comorbidities_plot(diabetes_demographics, demo, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_demographics, demo, "Reported Type")

eval_type

comorbidities_plot(diabetes_demographics, demo, "Evaluated Type")

Comorbidities

diabetes_comorbidities = 
  diabetes_df |> 
  mutate(
    kidney_disease = recode_factor(kidney_disease),
    depression = recode_factor(depression),
    arthritis = recode_factor(arthritis),
    michd = recode_factor(michd),
    blind = recode_factor(blind),
    covid = recode_factor(covid),
    stroke = recode_factor(stroke),
    bronchitis = recode_factor(bronchitis),
    asthma_ever = recode_factor(asthma_ever),
    asthma_now = recode_factor(asthma_now)
  ) |> 
  select(has_diabetes, diab_type, eval_type, kidney_disease, arthritis, depression, michd, blind, covid, stroke, bronchitis, asthma_ever, asthma_now) |> 
  rename(
    `History of Kidney Disease` = kidney_disease,
    `History of MI/CHD` = michd,
    `History of Depression` = depression,
    `Blindness` = blind,
    `History of Stroke` = stroke,
    `History of Arthritis` = arthritis,
    `History of Asthma` = asthma_ever,
    `Currently Have Asthma` = asthma_now,
    `Ever Tested Positive for Covid-19` = covid,
    `History of Bronchitis` = bronchitis,
    `Has Diabetes` = has_diabetes,
    `Reported Type` = diab_type,
    `Evaluated Type` = eval_type
  )

This section explores the distribution of co-morbidities across diabetes diagnoses.

  • History of kidney disease
  • History of depression
  • Blindness
  • History of myocardial infarction (MI) or coronary heart disease (CHD)
  • History of stroke
  • History of arthritis
  • History of asthma
  • Currently has asthma
  • Ever tested positive for COVID-19
  • History of bronchitis

Kidney disease

comorbidity = "History of Kidney Disease"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Depression

comorbidity = "History of Depression"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Blindness

comorbidity = "Blindness"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

MI/CHD

comorbidity = "History of MI/CHD"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Stroke

comorbidity = "History of Stroke"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Arthritis

comorbidity = "History of Arthritis"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Asthma

has_diabetes

comorbidities_plot(diabetes_comorbidities, "History of Asthma", "Has Diabetes")

comorbidities_plot(diabetes_comorbidities, "Currently Have Asthma", "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, "History of Asthma", "Reported Type")

comorbidities_plot(diabetes_comorbidities, "Currently Have Asthma", "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, "History of Asthma", "Evaluated Type")

comorbidities_plot(diabetes_comorbidities, "Currently Have Asthma", "Evaluated Type")

COVID-19

comorbidity = "Ever Tested Positive for Covid-19"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Bronchitis

comorbidity = "History of Bronchitis"

has_diabetes

comorbidities_plot(diabetes_comorbidities, comorbidity, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Reported Type")

eval_type

comorbidities_plot(diabetes_comorbidities, comorbidity, "Evaluated Type")

Associated Factors

diabetes_factors = 
  diabetes_df |> 
  mutate(
    high_bp = recode_factor(high_bp),
    a1c_check = case_match(a1c_check, 0 ~ "No", 1 ~ "Yes", NA ~ NA),
    a1c_check = recode_factor(a1c_check),
    chol_meds = recode_factor(chol_meds),
    chol_check = recode_factor(chol_check),
    smoker = factor(smoker, levels = c("Current smoker (every day)", "Current smoker (some days)", "Former smoker", "Never smoked")),
    ecigs = recode_factor(ecigs),
    prediabetic = factor(prediabetic, levels = c("Yes", "No", "Gestational")),
    obese = recode_factor(obese),
    cancer = recode_factor(cancer),
    heavy_drinking = recode_factor(heavy_drinking),
    binge_drinking = recode_factor(binge_drinking)
  ) |> 
  select(high_bp, a1c_check, chol_meds, chol_check, smoker, ecigs, prediabetic, obese, cancer, heavy_drinking, binge_drinking, has_diabetes, diab_type, eval_type) |> 
  rename(
    `High Blood Pressure` = high_bp,
    `High Cholesterol` = chol_check,
    `Meds Usage for Cholesterol` = chol_meds,
    `Checked A1C in the Past Year` = a1c_check,
    `Smoking Status` = smoker,
    `E-Cigarette Use` = ecigs,
    `Prediabetes Diagnosis` = prediabetic,
    `Overweight/Obese (Computed)` = obese,
    `Cancer Diagnosis` = cancer,
    `Heavy Drinking` = heavy_drinking,
    `Binge Drinking` = binge_drinking,
    `Has Diabetes` = has_diabetes,
    `Reported Type` = diab_type,
    `Evaluated Type` = eval_type
  )

This section explores the distribution of related factors across diabetes diagnoses.

  • High blood pressure
  • High blood sugar
  • High cholesterol
  • Checked for A1C over the past year
  • Smoking status
  • Ever received a prediabetes diagnosis
  • Overweight/obese
  • Ever received a cancer diagnosis
  • Drinking behavior

High BP

factor = "High Blood Pressure"

has_diabetes

comorbidities_plot(diabetes_factors, factor, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_factors, factor, "Reported Type")

eval_type

comorbidities_plot(diabetes_factors, factor, "Evaluated Type")

High Cholesterol

has_diabetes

comorbidities_plot(diabetes_factors, "High Cholesterol", "Has Diabetes")

comorbidities_plot(diabetes_factors, "Meds Usage for Cholesterol", "Has Diabetes")

diab_type

comorbidities_plot(diabetes_factors, "High Cholesterol", "Reported Type")

comorbidities_plot(diabetes_factors, "Meds Usage for Cholesterol", "Reported Type")

eval_type

comorbidities_plot(diabetes_factors, "High Cholesterol", "Evaluated Type")

comorbidities_plot(diabetes_factors, "Meds Usage for Cholesterol", "Evaluated Type")

Smoker

has_diabetes

comorbidities_plot(diabetes_factors, "Smoking Status", "Has Diabetes")

comorbidities_plot(diabetes_factors, "E-Cigarette Use", "Has Diabetes")

diab_type

comorbidities_plot(diabetes_factors, "Smoking Status", "Reported Type")

comorbidities_plot(diabetes_factors, "E-Cigarette Use", "Reported Type")

eval_type

comorbidities_plot(diabetes_factors, "Smoking Status", "Evaluated Type")

comorbidities_plot(diabetes_factors, "E-Cigarette Use", "Evaluated Type")

Obesity

factor = "Overweight/Obese (Computed)"

has_diabetes

comorbidities_plot(diabetes_factors, factor, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_factors, factor, "Reported Type")

eval_type

comorbidities_plot(diabetes_factors, factor, "Evaluated Type")

Cancer

factor = "Cancer Diagnosis"

has_diabetes

comorbidities_plot(diabetes_factors, factor, "Has Diabetes")

diab_type

comorbidities_plot(diabetes_factors, factor, "Reported Type")

eval_type

comorbidities_plot(diabetes_factors, factor, "Evaluated Type")

Drinking

has_diabetes

comorbidities_plot(diabetes_factors, "Heavy Drinking", "Has Diabetes")

comorbidities_plot(diabetes_factors, "Binge Drinking", "Has Diabetes")

diab_type

comorbidities_plot(diabetes_factors, "Heavy Drinking", "Reported Type")

comorbidities_plot(diabetes_factors, "Binge Drinking", "Reported Type")

eval_type

comorbidities_plot(diabetes_factors, "Heavy Drinking", "Evaluated Type")

comorbidities_plot(diabetes_factors, "Binge Drinking", "Evaluated Type")